{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Time Series Basics with Pandas"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Hi Guys, Welcome to [Tirendaz Academy](https://youtube.com/c/tirendazacademy) 😀\n",
    "</br>\n",
    "In this notebook, I'm going to talk about time series basics with Pandas.\n",
    "</br>\n",
    "Happy learning 🐱‍🏍 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## What is the time series?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from datetime import datetime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "date=[datetime(2020,1,5),\n",
    "      datetime(2020,1,10),\n",
    "      datetime(2020,1,15),\n",
    "      datetime(2020,1,20),\n",
    "      datetime(2020,1,25)] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2020-01-05    1.021772\n",
       "2020-01-10    0.132016\n",
       "2020-01-15   -0.838591\n",
       "2020-01-20   -0.749657\n",
       "2020-01-25    1.043829\n",
       "dtype: float64"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts=pd.Series(np.random.randn(5),index=date)\n",
    "ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2020-01-05', '2020-01-10', '2020-01-15', '2020-01-20',\n",
       "               '2020-01-25'],\n",
       "              dtype='datetime64[ns]', freq=None)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts.index "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Time Series Data Structures"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Timestamp('2020-01-01 00:00:00')"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.to_datetime(\"01/01/2020\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2020-07-05', '2020-07-06', '2020-07-07', '2020-07-08'], dtype='datetime64[ns]', freq=None)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dates=pd.to_datetime(\n",
    "    [datetime(2020,7,5),\n",
    "     \"6th of July, 2020\",\n",
    "     \"2020-Jul-7\",\n",
    "     \"20200708\"])\n",
    "dates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PeriodIndex(['2020-07-05', '2020-07-06', '2020-07-07', '2020-07-08'], dtype='period[D]', freq='D')"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dates.to_period(\"D\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "TimedeltaIndex(['0 days', '1 days', '2 days', '3 days'], dtype='timedelta64[ns]', freq=None)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dates-dates[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Creating a Time Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2020-08-15', '2020-08-16', '2020-08-17', '2020-08-18',\n",
       "               '2020-08-19', '2020-08-20', '2020-08-21', '2020-08-22',\n",
       "               '2020-08-23', '2020-08-24', '2020-08-25', '2020-08-26',\n",
       "               '2020-08-27', '2020-08-28', '2020-08-29', '2020-08-30',\n",
       "               '2020-08-31', '2020-09-01'],\n",
       "              dtype='datetime64[ns]', freq='D')"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.date_range(\"2020-08-15\",\"2020-09-01\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2020-07-15', '2020-07-16', '2020-07-17', '2020-07-18',\n",
       "               '2020-07-19', '2020-07-20', '2020-07-21', '2020-07-22',\n",
       "               '2020-07-23', '2020-07-24'],\n",
       "              dtype='datetime64[ns]', freq='D')"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.date_range('2020-07-15', periods=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2020-07-15 00:00:00', '2020-07-15 01:00:00',\n",
       "               '2020-07-15 02:00:00', '2020-07-15 03:00:00',\n",
       "               '2020-07-15 04:00:00', '2020-07-15 05:00:00',\n",
       "               '2020-07-15 06:00:00', '2020-07-15 07:00:00',\n",
       "               '2020-07-15 08:00:00', '2020-07-15 09:00:00'],\n",
       "              dtype='datetime64[ns]', freq='H')"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.date_range(\"2020-07-15\",\n",
    "              periods=10,\n",
    "              freq=\"H\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PeriodIndex(['2020-10', '2020-11', '2020-12', '2021-01', '2021-02', '2021-03',\n",
       "             '2021-04', '2021-05', '2021-06', '2021-07'],\n",
       "            dtype='period[M]', freq='M')"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.period_range(\"2020-10\", \n",
    "                periods=10,\n",
    "                freq=\"M\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "TimedeltaIndex(['0 days 00:00:00', '0 days 01:00:00', '0 days 02:00:00',\n",
       "                '0 days 03:00:00', '0 days 04:00:00', '0 days 05:00:00',\n",
       "                '0 days 06:00:00', '0 days 07:00:00'],\n",
       "               dtype='timedelta64[ns]', freq='H')"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.timedelta_range(0,periods=8,freq=\"H\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Timestamp('2020-01-10 00:00:00')"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stamp=ts.index[1]\n",
    "stamp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.1320158576081964"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts[stamp]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0438287426871447"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts[\"25.1.2020\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0438287426871447"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts[\"20200125\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2020-01-01   -0.563091\n",
       "2020-01-02   -0.696792\n",
       "2020-01-03   -1.171112\n",
       "2020-01-04    0.140425\n",
       "2020-01-05    1.861661\n",
       "Freq: D, dtype: float64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "long_ts=pd.Series(\n",
    "    np.random.randn(1000),\n",
    "    index=pd.date_range(\"1/1/2020\",\n",
    "                        periods=1000))\n",
    "long_ts.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2020-01-01   -0.563091\n",
       "2020-01-02   -0.696792\n",
       "2020-01-03   -1.171112\n",
       "2020-01-04    0.140425\n",
       "2020-01-05    1.861661\n",
       "Freq: D, dtype: float64"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "long_ts[\"2020\"].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2020-10-01   -0.074736\n",
       "2020-10-02   -0.376618\n",
       "2020-10-03    0.846641\n",
       "2020-10-04    0.625867\n",
       "2020-10-05   -0.723827\n",
       "2020-10-06    0.588691\n",
       "2020-10-07   -0.105803\n",
       "2020-10-08   -2.083312\n",
       "2020-10-09    1.587779\n",
       "2020-10-10   -1.055614\n",
       "2020-10-11   -1.249746\n",
       "2020-10-12   -0.501513\n",
       "2020-10-13   -1.527727\n",
       "2020-10-14    0.757313\n",
       "2020-10-15    1.649080\n",
       "Freq: D, dtype: float64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "long_ts[\"2020-10\"].head(15)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2022-09-20   -0.544946\n",
       "2022-09-21    0.724151\n",
       "2022-09-22   -0.733824\n",
       "2022-09-23    0.809910\n",
       "2022-09-24   -1.342181\n",
       "2022-09-25    0.311458\n",
       "2022-09-26   -1.724693\n",
       "Freq: D, dtype: float64"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "long_ts[datetime(2022,9,20):] "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## The Important Methods Used in Time Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2020-01-05    1.021772\n",
       "2020-01-10    0.132016\n",
       "2020-01-15   -0.838591\n",
       "2020-01-20   -0.749657\n",
       "2020-01-25    1.043829\n",
       "dtype: float64"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2020-01-05    1.021772\n",
       "2020-01-10    0.132016\n",
       "2020-01-15   -0.838591\n",
       "dtype: float64"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts.truncate(after=\"1/15/2020\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "date=pd.date_range(\"1/1/2020\",\n",
    "                   periods=100,\n",
    "                   freq=\"W-SUN\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2020-01-05</th>\n",
       "      <td>0.298570</td>\n",
       "      <td>0.541989</td>\n",
       "      <td>0.270855</td>\n",
       "      <td>0.812892</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-01-12</th>\n",
       "      <td>0.559454</td>\n",
       "      <td>0.052274</td>\n",
       "      <td>-0.129981</td>\n",
       "      <td>-1.355922</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-01-19</th>\n",
       "      <td>-0.936940</td>\n",
       "      <td>-1.359541</td>\n",
       "      <td>0.060583</td>\n",
       "      <td>0.055678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-01-26</th>\n",
       "      <td>1.413215</td>\n",
       "      <td>-1.267353</td>\n",
       "      <td>0.260763</td>\n",
       "      <td>1.158904</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-02-02</th>\n",
       "      <td>-0.480138</td>\n",
       "      <td>1.104503</td>\n",
       "      <td>0.925559</td>\n",
       "      <td>-0.183879</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D\n",
       "2020-01-05  0.298570  0.541989  0.270855  0.812892\n",
       "2020-01-12  0.559454  0.052274 -0.129981 -1.355922\n",
       "2020-01-19 -0.936940 -1.359541  0.060583  0.055678\n",
       "2020-01-26  1.413215 -1.267353  0.260763  1.158904\n",
       "2020-02-02 -0.480138  1.104503  0.925559 -0.183879"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "long_df=pd.DataFrame(np.random.randn(100,4),\n",
    "                    index=date,\n",
    "                    columns=list(\"ABCD\"))\n",
    "long_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2020-10-04</th>\n",
       "      <td>0.273074</td>\n",
       "      <td>0.634981</td>\n",
       "      <td>-0.887838</td>\n",
       "      <td>0.148878</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-10-11</th>\n",
       "      <td>0.108231</td>\n",
       "      <td>-0.053346</td>\n",
       "      <td>-0.520075</td>\n",
       "      <td>-1.125796</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-10-18</th>\n",
       "      <td>-0.757139</td>\n",
       "      <td>-0.287251</td>\n",
       "      <td>-1.123849</td>\n",
       "      <td>1.163562</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-10-25</th>\n",
       "      <td>0.550777</td>\n",
       "      <td>0.295018</td>\n",
       "      <td>0.622508</td>\n",
       "      <td>-0.551081</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D\n",
       "2020-10-04  0.273074  0.634981 -0.887838  0.148878\n",
       "2020-10-11  0.108231 -0.053346 -0.520075 -1.125796\n",
       "2020-10-18 -0.757139 -0.287251 -1.123849  1.163562\n",
       "2020-10-25  0.550777  0.295018  0.622508 -0.551081"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "long_df[\"2020-10\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2020-01-01    0\n",
       "2020-01-02    1\n",
       "2020-01-02    2\n",
       "2020-01-02    3\n",
       "2020-01-03    4\n",
       "dtype: int32"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "date=pd.DatetimeIndex(\n",
    "    [\"1/1/2020\",\"1/2/2020\",\"1/2/2020\",\n",
    "     \"1/2/2020\",\"1/3/2020\"])\n",
    "ts1=pd.Series(np.arange(5),index=date)\n",
    "ts1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts1.index.is_unique "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "group=ts1.groupby(level=0) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2020-01-01    1\n",
       "2020-01-02    3\n",
       "2020-01-03    1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "group.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2020-01-01    0\n",
       "2020-01-02    2\n",
       "2020-01-03    4\n",
       "dtype: int32"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "group.mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Don't forget to follow us on [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
